The objective of this notebook is to refine the clustering annotation done at level 3. This refinement is the result of a manual curation carried out by specialists to remove poor quality cells, misclassified cells or clusters with very few cells.
library(Seurat)
library(Signac)
library(tidyverse)
library(reshape2)
library(ggpubr)
library(harmony)
library(plotly)
set.seed(1234)
cell_type = "GCBC"
# Paths
path_to_obj <- str_c(
here::here("scATAC-seq/results/R_objects/level_3/"),
cell_type,
"/",
cell_type,
"_integrated_level_3.rds",
sep = ""
)
path_to_obj_RNA <- str_c(
here::here("scRNA-seq/3-clustering/4-level_4/"),
cell_type,
"/",
cell_type,
"_clustered_clean_level_4_with_final_clusters.rds",
sep = ""
)
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# Colors
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D",
"#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0",
"#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA",
"#FBE426", "#16FF32", "black", "#3283FE",
"#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB",
"#822E1C", "coral2", "#1CFFCE", "#1CBE4F",
"#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
"#FEAF16", "#683B79", "#B10DA1", "#1C7F93",
"#F8A19F", "dark orange", "#FEAF16", "#FBE426",
"Brown")
path_to_level_4 <- here::here("scATAC-seq/results/R_objects/level_4/GCBC/")
path_to_save <- str_c(path_to_level_4, "GCBC_integrated_level_4.rds")
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 270784 features across 23893 samples within 1 assay
## Active assay: peaks_macs (270784 features, 262136 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
seurat_RNA <- readRDS(path_to_obj_RNA)
seurat_RNA
## An object of class Seurat
## 37378 features across 72174 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
DimPlot(seurat,
pt.size = 0.1, split.by = "assay")
DimPlot(seurat,
pt.size = 0.1, split.by = "age_group")
p1 <- DimPlot(seurat,
pt.size = 0.1) + NoLegend()
p2 <- DimPlot(seurat_RNA,
pt.size = 0.1,cols = color_palette)
p1 + p2
tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
rownames_to_column(var = "cell_barcode") %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("cell_barcode", "cluster8_subcluster")
tonsil_ATAC_cell_barcode <- seurat@meta.data %>%
rownames_to_column(var = "cell_barcode") %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("cell_barcode")
possible_doublets_ATAC <- setdiff(tonsil_ATAC_cell_barcode$cell_barcode,tonsil_RNA_annotation$cell_barcode)
seurat$quality_cells <- ifelse(colnames(seurat) %in% possible_doublets_ATAC, "Poor-quality", "Good-quality")
DimPlot(
seurat,
split.by = "quality_cells")
DimPlot(seurat,split.by = "scrublet_predicted_doublet_atac")
table(seurat$scrublet_predicted_doublet_atac)
##
## FALSE TRUE
## 22375 1518
qc_vars <- c(
"nCount_peaks",
"nFeature_peaks",
"nucleosome_signal",
"TSS.enrichment"
)
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, features = x, max.cutoff = "q95")
p
})
qc_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
qc_vars <- c("NBC.MBC", "GCBC", "PC", "CD4.T", "Cytotoxic")
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, feature = x,
max.cutoff = 4, min.cutoff = -4) +
scale_color_viridis_c(option = "magma")
p
})
qc_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
resolutions <- c(0.1, 0.12, 0.2, 0.3 , 0.5, 0.75)
seurat <- FindClusters(seurat, resolution = resolutions)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9099
## Number of communities: 10
## Elapsed time: 3 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8939
## Number of communities: 11
## Elapsed time: 2 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8659
## Number of communities: 14
## Elapsed time: 2 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8491
## Number of communities: 15
## Elapsed time: 2 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8215
## Number of communities: 18
## Elapsed time: 2 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23893
## Number of edges: 619663
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7955
## Number of communities: 22
## Elapsed time: 2 seconds
vars <- str_c("peaks_macs_snn_res.", resolutions)
umap_clusters <- purrr::map(vars, function(x) {
p <- DimPlot(seurat, group.by = x, cols = color_palette)
p
})
umap_clusters
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]